0. Load data
# Load data
data <- readRDS("../../output/step_1/finalData.RDS")
dataMatrix <- readRDS("../../output/step_1/dataMatrix.RDS")
keyInfo <- readRDS("../../output/step_1/keyInfo.RDS")
metaInfo <- readRDS("../../output/step_1/metaInfo.RDS")
# Add 'Age' column
matchTable <- data.frame("age"=c("60", "44", "31", "25", "25", "43"),
"SampleID"=c("105", "218", "261", "043", "160", "149"),
stringsAsFactors=FALSE)
keyInfo <- merge(keyInfo, matchTable, all.x=TRUE)
# Remove redundant data
rm(matchTable)
2. Visualize top 5 most significant methylation positions (rows)
# Separate data in two groups by age
firstGroup <- keyInfo[keyInfo$age < 35, ]$Sample_Name
secondGroup <- keyInfo[keyInfo$age >= 35, ]$Sample_Name
# Obtain most significant 5
topRows <- pValues[order(pValues$pValue), ]
topRows <- head(topRows, 5)
# Obtain these rows from dataMatrix
topRowsData <- dataMatrix[match(rownames(topRows), rownames(dataMatrix)),
order(keyInfo$age)]
# Change the column names so that classification can be done
for(i in 1:55) {
if(colnames(topRowsData)[i] %in% firstGroup) {
colnames(topRowsData)[i] <- "first"
}
else {
colnames(topRowsData)[i] <- "second"
}
}
# Generate plots
for(i in 1:5) {
plot(
topRowsData[i, ],
main = rownames(topRowsData)[i],
ylab = "Methylation level",
col = as.factor(colnames(topRowsData))
)
legend("topright",
inset=.02,
title="Age",
c("< 35", ">= 35"),
fill=c("red", "black"),
horiz=TRUE,
cex=0.8)
}





# Remove redundant data
rm(firstGroup)
rm(secondGroup)
rm(topRows)
rm(i)
3. Provide a table with number of significant rows at the following levels
# Produce a table from dataframe
valuesTable <- data.frame(
"levels" = c("alpha = 0.1", "alpha = 0.05",
"alpha = 0.01", "FDR correction = 0.05",
"Bonferroni correction = 0.05"),
"significantRows" = c(sum(pValues$pValue < 0.1),
sum(pValues$pValue < 0.05),
sum(pValues$pValue < 0.01),
sum(p.adjust(pValues$pValue,
method = "fdr") < 0.05),
sum(p.adjust(pValues$pValue,
method = "bonferroni") < 0.05)
)
)
# Generate the table
kable(valuesTable, "html") %>%
kable_styling(font_size = 10) %>%
kable_styling("striped") %>%
scroll_box(width = "100%")
|
levels
|
significantRows
|
|
alpha = 0.1
|
53307
|
|
alpha = 0.05
|
29068
|
|
alpha = 0.01
|
6159
|
|
FDR correction = 0.05
|
0
|
|
Bonferroni correction = 0.05
|
0
|
4. Visualize the histogram
hist(
pValues$pValue,
main = "p-values",
xlab = "p-value",
col = c("gray", "lightpink")
)

5. Visualize the volcano-plot
plot(
pValues$effectSize,
-log(pValues$pValue),
main = "Volcano plot",
xlab = "Effect size",
ylab = "p-value log",
col = c("gray", "lightpink")
)

6. Visualize a manhattan plot
plot(
x = metaInfo$pos[metaInfo$chr == "chrX"],
y = -log10(pValues$pValue[metaInfo$chr == "chrX"]),
main = "Manhattan plot",
xlab = "Chromosome X positions",
ylab = "-log10",
col = c("gray", "lightpink")
)

7. Perform the first step using a linear model ???